#' Parallel Analysis Based on Multivariate LDSC
#'
#' \code{paLDSC} performs parallel analysis using LDSC-derived genetic (co)variance matrices to determine the number of non-spurious latent dimensions in genomic data. The function compares the eigenvalues from the LDSC matrix to those derived from null matrices generated under the multivariate LDSC sampling distribution.
#'
#' @param S A genetic correlation matrix (\code{S_Stand}) or genetic covariance matrix (\code{S}) from LDSC output.
#'           We recommend using the genetic correlation matrix (\code{S_Stand}) to maximize explained genetic variance.
#' @param V The corresponding multivariate sampling covariance matrix from LDSC (\code{V_Stand} or \code{V}),
#'          matching the scaling of \code{S}.
#' @param r Number of Monte Carlo replications to simulate null correlation matrices. Default is \code{500}.
#' @param p Percentile threshold for significance (default = \code{0.95}). Eigenvalues from \code{S} must exceed this threshold to be considered significant.
#' @param diag Logical. If \code{TRUE}, diagonalized PA is performed assuming uncorrelated sampling errors. Default is \code{FALSE}.
#' @param fa Logical. If \code{TRUE}, also performs exploratory factor analysis and computes eigenvalues from factor solutions. Default is \code{FALSE}.
#' @param fm Factor extraction method used when \code{fa = TRUE}. Default is \code{"minres"}. See \code{psych::fa.parallel()}.
#' @param nfactors Number of factors to extract in factor analysis if \code{fa = TRUE}. Default is \code{1}.
#' @param save.pdf Logical. Whether to save scree plots to a PDF file. Default is \code{FALSE}.
#' @param only.values Logical. If \code{TRUE}, only eigenvalues are returned. Default is \code{FALSE}.
#'
#' @details
#' This method adapts Horn’s (1965) classic parallel analysis approach to LDSC-based genomic data. Eigenvalues from the observed LDSC-derived matrix are compared against distributions of eigenvalues simulated under the null model using the multivariate LDSC sampling distribution. The number of components to retain is determined by comparing each observed eigenvalue to the corresponding percentile threshold from the simulated distribution.
#'
#' If \code{diag = TRUE}, diagonal sampling variances are used to construct null matrices (faster, but less conservative). If \code{diag = FALSE}, the full LDSC sampling covariance matrix \code{V} is used to simulate the null distribution.
#'
#' Factor analysis mode (\code{fa = TRUE}) follows the logic of \code{psych::fa.parallel}, allowing users to inspect common factor solutions.
#'
#' @return A list containing observed eigenvalues, simulated eigenvalue distributions, and the suggested number of factors/components to extract.
#'
#' @references
#' de la Fuente, J., & Tucker-Drob, E. M. (2023). *paLDSC: Parallel Analysis Based on Multivariate LDSC*. \cr
#' \url{https://rpubs.com/JaFuente/paLDSC}
#'
#' Horn, J. L. (1965). A rationale and test for the number of factors in factor analysis. *Psychometrika*, 30(2), 179–185.
#'
#' @examples
#' \dontrun{
#' # Load LDSC output
#' load("LDSC_output_paLDSC_example.RData")
#'
#' # Run paLDSC with 10 replications
#' paLDSC(S = LDSCoutputfull$S_Stand, V = LDSCoutputfull$V_Stand, r = 10)
#' }
#'
#' @seealso \link[psych]{fa.parallel}, \link[stats]{eigen}
#'
#' @export
paLDSC <- function(S = S, V = V, r = NULL, p = NULL, save.pdf = F, diag = F, fa = F,
                   fm = NULL, nfactors = NULL) {
  list.of.packages <- c("ggplot2", "MASS","matrixStats","gdata","psych","matrixStats","egg","ggpubr","Matrix")
  new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
  if(length(new.packages)) install.packages(new.packages)
  invisible(lapply(list.of.packages, library,character.only = TRUE))
  if (is.null(r)){
    r <- 500
  }
  if (is.null(p)){
    p <- .95
  }
  if (is.null(fm)){
    fm <- "minres"
  }
  if (is.null(nfactors)){
    nfactors <- 1
  }
  
  #---- Get Dimensions of Matrices ----#
  k=dim(S)[1] #k phenotypes
  kstar=k*(k+1)/2 #kstar unique variances/covariances
  Svec=lowerTriangle(S,diag=T) #vectorize S
  SNULL=(0*S)
  diag(SNULL)=diag(S)
  SNULLvec=lowerTriangle(SNULL,diag=T) #vectorize S null
  #---- Parallel Analysis ----#
  EIG=as.data.frame(matrix(NA,nrow=k,ncol=r))
  for (i in 1:r) {
    Sample_null=mvrnorm(n=1,mu=SNULLvec,Sigma=V) #Simulate a null vectorized correlation matrix with noise drawn from the multivariate sample distribution
    Sample_null_M=matrix(0,ncol=k,nrow=k) #Turn it back into a matrix
    lowerTriangle(Sample_null_M,diag=T)=Sample_null
    upperTriangle(Sample_null_M,diag=F)=upperTriangle(t(Sample_null_M))
    EIG[,i]=eigen(Sample_null_M)$values #Store the eigenvalues
    cat("Running parallel analysis. Replication number: ",i,"\n")
  }
  #---- Find the p percentile for permuted and observed eigenvalues ----#
  Parallel_values=rowQuantiles(as.matrix(EIG),probs=p)
  Observed_PCA_values=eigen(S)$values
  #Create data frame from observed eigenvalue data
  obs = data.frame(Observed_PCA_values)
  obs$type = c('Observed Data')
  obs$num = c(row.names(obs))
  obs$num = as.numeric(obs$num)
  colnames(obs) = c('eigenvalue', 'type', 'num')
  #Create data frame from permuted data
  simPA = data.frame(Parallel_values)
  simPA$type = paste("Simulated data (",(p*100),"th %ile)",sep = "")
  simPA$num = c(row.names(obs))
  simPA$num = as.numeric(simPA$num)
  colnames(simPA) = c('eigenvalue', 'type', 'num')
  eigendatPA = rbind(obs,simPA)
  nfactPA <- min(which((eigendatPA[1:k,1] < eigendatPA[(k+1):(k*2),1] ) == TRUE))-1
  #Create vector for observed minus permuted data and # of factors
  obsPA <- data.frame(obs[1]-simPA[1])
  obsPA$type = paste("Observed minus simulated data (",(p*100),"th %ile)",sep = "")
  obsPA$num = c(row.names(obs))
  obsPA$num = as.numeric(obsPA$num)
  colnames(obsPA) = c('eigenvalue', 'type', 'num')
  nfactobsPA <- which(obsPA < 0)[1]-1
  
  if (diag == T) { 
    Vd_stand=0*V
    diag(Vd_stand)=diag(V)
    #---- Diagonalized Parallel Analysis ----#
    EIGd=as.data.frame(matrix(NA,nrow=k,ncol=r))
    for (i in 1:r) {
      Sample_null=mvrnorm(n=1,mu=SNULLvec,Sigma=Vd_stand) #Simulate a null vectorized correlation matrtix with noise drawn from the multivariate sample distribution of S
      Sample_null_M=matrix(0,ncol=k,nrow=k) #Turn it back into a matrix
      lowerTriangle(Sample_null_M,diag=T)=Sample_null
      upperTriangle(Sample_null_M,diag=F)=upperTriangle(t(Sample_null_M))
      EIGd[,i]=eigen(Sample_null_M)$values #Store the eigen values
      cat("Running diagonalized parallel analysis. Replication number: ",i,"\n")
    }
    
    #---- Find the p percentil for diagonalized permuted data----#
    Paralleld_values=rowQuantiles(as.matrix(EIGd),probs=p)
    #Create data frame from diagonalized permuted data
    simPAd = data.frame(Paralleld_values)
    simPAd$type = paste("Simulated data (",(p*100),"th %ile)",sep = "")
    simPAd$num = c(row.names(obs))
    simPAd$num = as.numeric(simPAd$num)
    colnames(simPAd) = c('eigenvalue', 'type', 'num')
    eigendatPAd = rbind(obs,simPAd)
    nfactPAd <- min(which((eigendatPAd[1:k,1] < eigendatPAd[(k+1):(k*2),1] ) == TRUE))-1
    #Create vector for observed minus diagonalized permuted data and suggested number of components to be extracted
    obsPAd <- data.frame(obs[1]-simPAd[1])
    obsPAd$type = paste("Observed minus simulated data (",(p*100),"th %ile)",sep = "")
    obsPAd$num = c(row.names(obs))
    obsPAd$num = as.numeric(obsPAd$num)
    colnames(obsPAd) = c('eigenvalue', 'type', 'num')
    nfactobsPAd <- which(obsPAd < 0)[1]-1
  }  
  
  #--- If eigenvalues from FA are required ---#
  if (fa == T) {
    Ssmooth<-as.matrix((nearPD(S, corr = T))$mat) #Smooth S matrix
    Observed_FA_values=fa(Ssmooth, fm = fm, nfactors = nfactors,SMC = FALSE, warnings = FALSE, rotate = "none")$values
    #Create data frame for observed FA eigenvalue data
    obsFA = data.frame(Observed_FA_values)
    obsFA$type = c('Observed Data')
    obsFA$num = c(row.names(obsFA))
    obsFA$num = as.numeric(obsFA$num)
    colnames(obsFA) = c('eigenvalue', 'type', 'num')
    #---- FA Parallel Analysis ----#
    EIGfa=as.data.frame(matrix(NA,nrow=k,ncol=r))
    for (i in 1:r) {
      Sample_null=mvrnorm(n=1,mu=SNULLvec,Sigma=V) #Simulate a null vectorized correlation matrtix with noise drawn from the multivariate sample distribution of S
      Sample_null_M=matrix(0,ncol=k,nrow=k) #Turn it back into a matrix
      lowerTriangle(Sample_null_M,diag=T)=Sample_null
      upperTriangle(Sample_null_M,diag=F)=upperTriangle(t(Sample_null_M))
      Ssmooth<-as.matrix((nearPD(Sample_null_M, corr = T))$mat)
      EIGfa[,i]=fa(Ssmooth, fm = fm, nfactors = nfactors,SMC = FALSE, warnings = FALSE, rotate = "none")$values #store the eigen values
      cat("Running FA parallel analysis. Replication number: ",i,"\n")
    } 
    
    #---- Find the quantile for FA PA, diagonalized FA PA, and observed eigen values ----#
    Parallel_fa_values=rowQuantiles(as.matrix(EIGfa),probs=p)
    #Create data frame from simulated PA FA data
    simPAfa = data.frame(Parallel_fa_values)
    simPAfa$type = paste("Simulated data (",(p*100),"th %ile)",sep = "")
    simPAfa$num = c(row.names(obs))
    simPAfa$num = as.numeric(simPAfa$num)
    colnames(simPAfa) = c('eigenvalue', 'type', 'num')
    eigendatPAfa = rbind(obsFA,simPAfa)
    nfactPAfa <- min(which((eigendatPAfa[1:k,1] < eigendatPAfa[(k+1):(k*2),1]) == TRUE))-1
    if(nfactPAfa==Inf){
      nfactPAfa <- 1
    }
    #Create vector for observed minus fa PA simulated data and # of factors
    obsPAfa <- data.frame(obsFA[1]-simPAfa[1])
    obsPAfa$type = paste("Observed minus simulated data (",(p*100),"th %ile)",sep = "")
    obsPAfa$num = c(row.names(obsFA))
    obsPAfa$num = as.numeric(obsPAfa$num)
    colnames(obsPAfa) = c('eigenvalue', 'type', 'num')
    nfactobsPAfa <- which(obsPAfa < 0)[1]-1
    
    if (diag == T){ 
      #---- FA Diagonalized Parallel Analysis ----#
      EIGdfa=as.data.frame(matrix(NA,nrow=k,ncol=r))
      for (i in 1:r) {
        Sample_null=mvrnorm(n=1,mu=SNULLvec,Sigma=Vd_stand) #simulate a null vectorized correlation matrtix with noise drawn from the multivariate sample distribution of S
        Sample_null_M=matrix(0,ncol=k,nrow=k) #turn it back into a matrix
        lowerTriangle(Sample_null_M,diag=T)=Sample_null
        upperTriangle(Sample_null_M,diag=F)=upperTriangle(t(Sample_null_M))
        Ssmooth<-as.matrix((nearPD(Sample_null_M, corr = T))$mat)
        EIGdfa[,i]=fa(Ssmooth, fm = fm, nfactors =  nfactors,SMC = FALSE, warnings = FALSE,rotate = "none")$values #store the eigen values    cat("Running diagonalized parallel analysis. Replication number: ",i,"\n")
        cat("Running FA diagonalized parallel analysis. Replication number: ",i,"\n")
      }
      #---- Find the quantile for FA PA, diagonalized FA PA, and observed eigen values ----#
      Parallel_dfa_values=rowQuantiles(as.matrix(EIGdfa),probs=p)
      #Create data frame from diagonalized PA FA simulated data
      simPAdfa = data.frame(Parallel_dfa_values)
      simPAdfa$type = paste("Simulated data (",(p*100),"th %ile)",sep = "")
      simPAdfa$num = c(row.names(obs))
      simPAdfa$num = as.numeric(simPAdfa$num)
      colnames(simPAdfa) = c('eigenvalue', 'type', 'num')
      eigendatPAdfa = rbind(obsFA,simPAdfa)
      nfactPAdfa <- min(which((eigendatPAdfa[1:k,1] < eigendatPAdfa[(k+1):(k*2),1] ) == TRUE))-1
      if(nfactPAdfa==Inf){
        nfactPAdfa <- 1
      }
      #Create vector for observed minus diagonalized permuted data and # of factors
      obsPAdfa <- data.frame(obsFA[1]-simPAdfa[1])
      obsPAdfa$type = paste("Observed minus simulated data (",(p*100),"th %ile)",sep = "")
      obsPAdfa$num = c(row.names(obsFA))
      obsPAdfa$num = as.numeric(obsPAdfa$num)
      colnames(obsPAdfa) = c('eigenvalue', 'type', 'num')
      nfactobsPAdfa <- which(obsPAdfa < 0)[1]-1
    }
  }
  #---- Scree plots ----#
  apatheme=theme_bw()+
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          text=element_text(family='serif'),
          legend.title=element_blank(),
          legend.position=c(.7,.8),
          axis.line.x = element_line(color='black'),
          axis.line.y = element_line(color='black'))
  #---- Scree plot ----#
  if (isTRUE(all(diag(S) != rep(1, nrow(S))))){ # Verifica si la condición es verdadera
    pPA = ggplot(eigendatPA, aes(x = num, y = eigenvalue, shape = type)) +
      geom_line() +
      geom_point(size = 3) +
      scale_y_continuous(name = 'Eigenvalue') +
      scale_x_continuous(name = 'Component Number', breaks = min(1:k):max(1:k)) +
      scale_shape_manual(values = c(2, 17)) +
      geom_vline(xintercept = nfactPA, linetype = 'dashed') +
      apatheme
  } else {
    pPA = ggplot(eigendatPA, aes(x = num, y = eigenvalue, shape = type)) +
      geom_line() +
      geom_point(size = 3) +
      scale_y_continuous(name = 'Eigenvalue') +
      scale_x_continuous(name = 'Component Number', breaks = min(1:k):max(1:k)) +
      scale_shape_manual(values = c(2, 17)) +
      geom_hline(yintercept = 1) +
      apatheme
  }
  #---- Scree plot Observed minus permuted ----#
  pobsPA = ggplot(obsPA, aes(x=num, y=eigenvalue, shape=type)) +
    geom_line()+
    geom_point(size=3)+
    scale_y_continuous(name='Difference in Eigenvalues') +
    scale_x_continuous(name='Component Number', breaks=min(1:k):max(1:k))+
    scale_shape_manual(values=c(17)) +
    geom_vline(xintercept = nfactobsPA, linetype = 'dashed')+
    geom_hline(yintercept = 0)+
    apatheme
  
  if (diag == T){
    #---- Scree plot diagonalized PA ----#
    pPAd = ggplot(eigendatPAd, aes(x=num, y=eigenvalue, shape=type)) +
      geom_line()+
      geom_point(size=3)+
      scale_y_continuous(name='Diagonalized Eigenvalue')+
      scale_x_continuous(name='Component Number', breaks=min(1:k):max(1:k))+
      scale_shape_manual(values=c(2,17)) +
      geom_vline(xintercept = nfactPAd, linetype = 'dashed')+
      geom_hline(yintercept = 1)+
      apatheme
    #---- Scree plot Observed minus diagonalized permuted data ----#
    pobsPAd = ggplot(obsPAd, aes(x=num, y=eigenvalue, shape=type)) +
      geom_line()+
      geom_point(size=3)+
      scale_y_continuous(name='Difference in diagonalized Eigenvalues')+
      scale_x_continuous(name='Component Number', breaks=min(1:k):max(1:k))+
      scale_shape_manual(values=c(17)) +
      geom_vline(xintercept = nfactobsPAd, linetype = 'dashed')+
      geom_hline(yintercept = 0)+
      apatheme
  }
  
  if (isTRUE(fa)){
    #---- Scree plot FA PA ----#
    pPAfa = ggplot(eigendatPAfa, aes(x=num, y=eigenvalue, shape=type)) +
      geom_line()+
      geom_point(size=3)+
      scale_y_continuous(name='Eigenvalue from FA Solution')+
      scale_x_continuous(name='Factor Number', breaks=min(1:k):max(1:k))+
      scale_shape_manual(values=c(2,17)) +
      geom_vline(xintercept = nfactPAfa, linetype = 'dashed')+
      geom_hline(yintercept = 1)+
      apatheme
    #---- Scree plot Observed minus FA PA ----#
    pobsPAfa = ggplot(obsPAfa, aes(x=num, y=eigenvalue, shape=type)) +
      geom_line()+
      geom_point(size=3)+
      scale_y_continuous(name='Difference in FA Eigenvalues') +
      scale_x_continuous(name='Factor Number', breaks=min(1:k):max(1:k))+
      scale_shape_manual(values=c(17)) +
      geom_vline(xintercept = nfactobsPAfa, linetype = 'dashed')+
      geom_hline(yintercept = 0)+
      apatheme
    
    if (diag == T){
      #---- Scree plot FA diagonalized PA ----#
      pPAdfa = ggplot(eigendatPAdfa, aes(x=num, y=eigenvalue, shape=type)) +
        geom_line()+
        geom_point(size=3)+
        scale_y_continuous(name='Eigenvalue from diagonalized FA Solution')+
        scale_x_continuous(name='Factor Number', breaks=min(1:k):max(1:k))+
        scale_shape_manual(values=c(2,17)) +
        geom_vline(xintercept = nfactPAdfa, linetype = 'dashed')+
        geom_hline(yintercept = 1)+
        apatheme
      #---- Scree plot Observed minus FA diagonalized PA ----#
      pobsPAdfa = ggplot(obsPAdfa, aes(x=num, y=eigenvalue, shape=type)) +
        geom_line()+
        geom_point(size=3)+
        scale_y_continuous(name='Difference in diagonalized FA Eigenvalues')+
        scale_x_continuous(name='Factor Number', breaks=min(1:k):max(1:k))+
        scale_shape_manual(values=c(17)) +
        geom_vline(xintercept = nfactobsPAdfa, linetype = 'dashed')+
        geom_hline(yintercept = 0)+
        apatheme
    }
  }
  
  print(pPA)
  print(pobsPA)
  cat("------------------------------------------------------------------------","\n")
  cat("Parallel Analysis suggests extracting",nfactPA,"components","\n")
  cat("------------------------------------------------------------------------","\n")
  
  if (diag == T){
    print(pPAd)
    print(pobsPAd)
    cat("Diagonalized Parallel Analysis suggests extracting",nfactPAd,"components","\n")
    cat("------------------------------------------------------------------------","\n")
  }
  
  if (fa == T){
    print(pPAfa)
    print(pobsPAfa)
    cat("------------------------------------------------------------------------","\n")
    cat("Parallel Analysis suggests extracting",nfactPAfa,"factors","\n")
    cat("------------------------------------------------------------------------","\n")
    if (diag == T){
      print(pPAdfa)
      print(pobsPAdfa)
      cat("------------------------------------------------------------------------","\n")
      cat("Diagonalized Parallel Analysis suggests extracting",nfactPAdfa,"factors","\n")
      cat("------------------------------------------------------------------------","\n")
    }
  }
  
  
  if (isTRUE(save.pdf)){
    figurePCA <- ggarrange(pPA, pobsPA, 
                           labels = c(NA,NA),
                           ncol = 1, nrow = 2)
    ggexport(figurePCA, filename = "PA_LDSC.pdf",res = 300, verbose = F)
    if (diag == T){
      figurePCAd <- ggarrange(pPAd, pobsPAd, 
                              labels = c(NA,NA),
                              ncol = 1, nrow = 2)
      ggexport(figurePCAd, filename = "Diagonalized_PA_LDSC.pdf",res = 300, verbose = F)
    }
    if (fa == T){
      figureFA <- ggarrange(pPAfa, pobsPAfa, 
                            labels = c(NA,NA),
                            ncol = 1, nrow = 2)
      ggexport(figureFA, filename = "FA_PA_LDSC.pdf",res = 300, verbose = F)
      if (diag == T){
        figureFAd <- ggarrange(pPAdfa, pobsPAdfa, 
                               labels = c(NA,NA),
                               ncol = 1, nrow = 2)
        ggexport(figureFAd, filename = "FA_Diagonalized_PA_LDSC.pdf",res = 300, verbose = F)
      }
    }
  }
}
